# This file calculates education years by country of birth and immigration status 
library(data.table)
library(dplyr)
cohort=fread("Derived Data/cohort_2025.csv")

# Import country crosswalk 

country=read.csv("H:/Zheng_10223/Joint/countryfiles/country_crosswalk.csv", stringsAsFactors = FALSE)

# At least high school
 cohort$atleasthighschool=ifelse(cohort$YEARS_OF_SCHOOLING_MainParent>=12,1,0)
 cohort$atleastcollege=ifelse(cohort$YEARS_OF_SCHOOLING_MainParent>=16,1,0)
 

cohort=merge(cohort,country,by.x="birthcountry_child",by.y="Code",all.x=TRUE)

# US children, where are there parents from 
dfus_refugee=cohort[cohort$Country=="United States " & cohort$refugeein==1,]

dftab=dfus_refugee %>% group_by(CountryOfLastResidence) %>% summarize(count=n())


dfus_nonrefugee=cohort[cohort$Countryd=="United States " & cohort$refugeein==0,]
dftab=dfus_nonrefugee %>% group_by(CountryOfLastResidence) %>% summarize(count=n())





# Refugees top countries of birth  

dfrefugee=cohort %>% filter(ImmigrationCategory=="Refugee") %>% group_by(Country) %>% summarize(count=n(), highschoolatleast=mean(atleasthighschool, na.rm=TRUE),
                                                                                                collegeatleast=mean(atleastcollege, na.rm=TRUE))


# Non-Refugees top countries of birth 

dfnonrefugee=cohort %>% filter(ImmigrationCategory!="Refugee") %>% group_by(Country) %>% summarize(count=n(),highschoolatleast=mean(atleasthighschool, na.rm=TRUE),
                                                                                                   collegeatleast=mean(atleastcollege, na.rm=TRUE))
dfnonrefugee=dfnonrefugee %>% arrange(desc(count))




########### Get modal landing year by country 

landyear_refugee=cohort %>% filter(ImmigrationCategory=="Refugee") %>% group_by(Country,LandingYear_MainParent) %>% summarize(count=n()) %>% ungroup()
landyear_refugee=landyear_refugee %>% group_by(Country) %>% mutate(maxlandyear=max(count)) %>% filter(maxlandyear==count)


dfrefugee=merge(dfrefugee,landyear_refugee[,c("LandingYear_MainParent","Country")],by="Country",all.x=TRUE)
dfrefugee=dfrefugee %>% arrange(desc(count))

write.csv(dfrefugee[1:30,],"H:/Zheng_10223/ToVet/Output/refugeetop25.csv")


landyear_nonrefugee=cohort %>% filter(ImmigrationCategory!="Refugee") %>% group_by(Country,LandingYear_MainParent) %>% summarize(count=n()) %>% ungroup()
landyear_nonrefugee=landyear_nonrefugee %>% group_by(Country) %>% mutate(maxlandyear=max(count)) %>% filter(maxlandyear==count)


dfnonrefugee=merge(dfnonrefugee,landyear_nonrefugee[,c("LandingYear_MainParent","Country")],by="Country",all.x=TRUE)

dfnonrefugee=dfnonrefugee %>% arrange(desc(count))

write.csv(dfnonrefugee[1:30,],"H:/Zheng_10223/ToVet/Output/nonrefugeetop25.csv")
